{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "import neattext.functions as nfx\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "import necessary models and transformars\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.naive_bayes import MultinomialNB\n",
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score, classification_report,confusion_matrix"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Loading the dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='Emotion', ylabel='count'>"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAEGCAYAAACkQqisAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAZnklEQVR4nO3deZRmdX3n8fdHEFmUvYfBhqQZ7dGgiQotohhFUESigVFQVKRBJowjaNSYiKNHCIjikkGRkQQDsmhExAVcInYaQYOyNKCsQTosAoelZRNE0Ibv/HF/RT80VU317ap6KOv9Ouc5de/vbr9769b9PHf7VaoKSZL6eNKwKyBJmr4MEUlSb4aIJKk3Q0SS1JshIknqbfVhV2CqbbzxxjVnzpxhV0OSpo2LLrroV1U1a7RhMy5E5syZw6JFi4ZdDUmaNpLcMNYwL2dJknozRCRJvRkikqTeDBFJUm+GiCSpN0NEktSbISJJ6s0QkST1ZohIknqbcW+sT1e/PPRPh10F/ugjlw27CpKeYDwTkST1ZohIknozRCRJvRkikqTeDBFJUm+GiCSpN0NEktSbISJJ6s0QkST1ZohIknozRCRJvRkikqTeDBFJUm+GiCSpN0NEktTbpIVIkuOT3J7k8oGyDZMsSHJN+7lBK0+So5IsTnJpkq0Gppnfxr8myfyB8q2TXNamOSpJJmtdJEmjm8wzkROAnZcrOwhYWFVzgYWtH+A1wNz22R84BrrQAQ4GXgRsAxw8EjxtnL8amG75ZUmSJtmkhUhV/Qi4c7niXYETW/eJwG4D5SdV5zxg/SSbAq8GFlTVnVV1F7AA2LkNW7eqzquqAk4amJckaYpM9T2RTarqltZ9K7BJ654N3Dgw3k2tbEXlN41SPqok+ydZlGTRkiVLVm0NJEmPGNqN9XYGUVO0rGOral5VzZs1a9ZULFKSZoSpDpHb2qUo2s/bW/nNwOYD423WylZUvtko5ZKkKTTVIXIGMPKE1Xzg9IHyvdtTWtsC97TLXmcCOyXZoN1Q3wk4sw37dZJt21NZew/MS5I0RVafrBkn+QqwPbBxkpvonrI6Ajg1yX7ADcAb2+jfA3YBFgP3A/sCVNWdSQ4DLmzjHVpVIzfr30n3BNhawL+2jyRpCk1aiFTVm8cYtOMo4xZwwBjzOR44fpTyRcBzV6WOkqRV4xvrkqTeDBFJUm+GiCSpN0NEktSbISJJ6s0QkST1ZohIknozRCRJvRkikqTeDBFJUm+GiCSpN0NEktSbISJJ6s0QkST1ZohIknozRCRJvRkikqTeDBFJUm+GiCSpN0NEktSbISJJ6s0QkST1ZohIknozRCRJvRkikqTeDBFJUm+GiCSpN0NEktSbISJJ6m0oIZLkvUmuSHJ5kq8kWTPJFknOT7I4yVeTrNHGfUrrX9yGzxmYzwdb+dVJXj2MdZGkmWzKQyTJbODdwLyqei6wGrAn8AngyKp6JnAXsF+bZD/grlZ+ZBuPJFu26Z4D7Ax8PslqU7kukjTTDety1urAWklWB9YGbgF2AE5rw08Edmvdu7Z+2vAdk6SVn1JVD1bVdcBiYJupqb4kCYYQIlV1M/Bp4Jd04XEPcBFwd1UtbaPdBMxu3bOBG9u0S9v4Gw2WjzLNoyTZP8miJIuWLFkysSskSTPYMC5nbUB3FrEF8HRgHbrLUZOmqo6tqnlVNW/WrFmTuShJmlGGcTnrlcB1VbWkqn4PfAPYDli/Xd4C2Ay4uXXfDGwO0IavB9wxWD7KNJKkKTCMEPklsG2Stdu9jR2BK4EfAru3ceYDp7fuM1o/bfhZVVWtfM/29NYWwFzggilaB0kS3Q3uKVVV5yc5DbgYWApcAhwLfBc4JclHW9lxbZLjgJOTLAbupHsii6q6IsmpdAG0FDigqh6a0pWRpBluykMEoKoOBg5ervhaRnm6qqoeAPYYYz6HA4dPeAUlSePiG+uSpN4MEUlSb4aIJKk3Q0SS1JshIknqzRCRJPVmiEiSejNEJEm9GSKSpN4MEUlSb4aIJKk3Q0SS1JshIknqzRCRJPVmiEiSejNEJEm9GSKSpN4MEUlSb0P597j6w7Td57YbdhUAOPdd5w67CtKM4ZmIJKk3Q0SS1JshIknqzRCRJPVmiEiSehtXiCRZOJ4ySdLMssJHfJOsCawNbJxkAyBt0LrA7EmumyTpCe7x3hP5X8B7gKcDF7EsRH4NHD151ZIkTQcrDJGq+izw2STvqqrPTVGdJEnTxLjuiVTV55K8JMlbkuw98um70CTrJzktyX8kuSrJi5NsmGRBkmvazw3auElyVJLFSS5NstXAfOa38a9JMr9vfSRJ/Yz3xvrJwKeBlwIvbJ95q7DczwLfr6pnA88DrgIOAhZW1VxgYesHeA0wt332B45pddoQOBh4EbANcPBI8EiSpsZ4286aB2xZVbWqC0yyHvAyYB+Aqvod8LskuwLbt9FOBM4GPgDsCpzUln1eO4vZtI27oKrubPNdAOwMfGVV6yhJGp/xvidyOfBfJ2iZWwBLgC8muSTJPydZB9ikqm5p49wKbNK6ZwM3Dkx/Uysbq/wxkuyfZFGSRUuWLJmg1ZAkjTdENgauTHJmkjNGPj2XuTqwFXBMVb0A+A3LLl0B0M46VvmsZ2B+x1bVvKqaN2vWrImarSTNeOO9nHXIBC7zJuCmqjq/9Z9GFyK3Jdm0qm5pl6tub8NvBjYfmH6zVnYzyy5/jZSfPYH1lCQ9jnGFSFWdM1ELrKpbk9yY5FlVdTWwI3Bl+8wHjmg/T2+TnAEcmOQUupvo97SgORP42MDN9J2AD05UPSVJj29cIZLkXpZdXloDeDLwm6pat+dy3wV8OckawLXAvnSX1k5Nsh9wA/DGNu73gF2AxcD9bVyq6s4khwEXtvEOHbnJLkmaGuM9E3naSHeS0D0xtW3fhVbVzxj9EeEdRxm3gAPGmM/xwPF96yFJWjUr3Ypvdb4FvHriqyNJmk7Geznr9QO9T6I7i3hgUmokSZo2xvt01usGupcC19Nd0pIkzWDjvSey72RXRJI0/Yy37azNknwzye3t8/Ukm0125SRJT2zjvZz1ReBfgD1a/16t7FWTUSlpMp3zspcPuwoAvPxHE/b6lTQ04306a1ZVfbGqlrbPCYDth0jSDDfeELkjyV5JVmufvYA7JrNikqQnvvGGyNvp3iC/FbgF2J3WlLskaeYa7z2RQ4H5VXUXPPIPoT5NFy6SpBlqvGcifzYSINC1WwW8YHKqJEmaLsYbIk8a/Nez7UxkvGcxkqQ/UOMNgn8Afprka61/D+DwyamSJGm6GO8b6yclWQTs0IpeX1VXTl61JEnTwbgvSbXQMDgkSY9Y6abgJUkaYYhIknozRCRJvRkikqTeDBFJUm+GiCSpN0NEktSbISJJ6s0QkST1ZohIknozRCRJvdmcu6RVcvheuw+7CnzoS6cNuwozlmcikqTeDBFJUm9DC5EkqyW5JMl3Wv8WSc5PsjjJV5Os0cqf0voXt+FzBubxwVZ+dZJXD2lVJGnGGuaZyF8DVw30fwI4sqqeCdwF7NfK9wPuauVHtvFIsiWwJ/AcYGfg80lWm6K6S5IY0o31JJsBf0H3L3bflyR0/zXxLW2UE4FDgGOAXVs3wGnA0W38XYFTqupB4Loki4FtgJ9O0WpIk+rov/n2sKsAwIH/8LphV0FPYMM6E/kM8HfAw61/I+Duqlra+m8CZrfu2cCNAG34PW38R8pHmeZRkuyfZFGSRUuWLJnA1ZCkmW3KQyTJa4Hbq+qiqVpmVR1bVfOqat6sWbOmarGS9AdvGJeztgP+MskuwJrAusBngfWTrN7ONjYDbm7j3wxsDtyUZHVgPeCOgfIRg9NIkqbAlJ+JVNUHq2qzqppDd2P8rKp6K/BDYOStpfnA6a37jNZPG35WVVUr37M9vbUFMBe4YIpWQ5LEE+uN9Q8ApyT5KHAJcFwrPw44ud04v5MueKiqK5KcClwJLAUOqKqHpr7akjRzDTVEqups4OzWfS3d01XLj/MAsMcY0x9O94SXJGkIfGNdktSbISJJ6s0QkST1ZohIknozRCRJvRkikqTeDBFJUm+GiCSpN0NEktSbISJJ6s0QkST1ZohIknozRCRJvRkikqTeDBFJUm+GiCSpN0NEktSbISJJ6s0QkST1ZohIknozRCRJvRkikqTeDBFJUm+GiCSpN0NEktSbISJJ6s0QkST1tvqwKzBsW//tScOuAhd9au9hV0GSepnyM5Ekmyf5YZIrk1yR5K9b+YZJFiS5pv3coJUnyVFJFie5NMlWA/Oa38a/Jsn8qV4XSZrphnE5aynwN1W1JbAtcECSLYGDgIVVNRdY2PoBXgPMbZ/9gWOgCx3gYOBFwDbAwSPBI0maGlMeIlV1S1Vd3LrvBa4CZgO7Aie20U4EdmvduwInVec8YP0kmwKvBhZU1Z1VdRewANh56tZEkjTUG+tJ5gAvAM4HNqmqW9qgW4FNWvds4MaByW5qZWOVj7ac/ZMsSrJoyZIlE7cCkjTDDS1EkjwV+Drwnqr69eCwqiqgJmpZVXVsVc2rqnmzZs2aqNlK0ow3lBBJ8mS6APlyVX2jFd/WLlPRft7eym8GNh+YfLNWNla5JGmKDOPprADHAVdV1f8dGHQGMPKE1Xzg9IHyvdtTWtsC97TLXmcCOyXZoN1Q36mVSZKmyDDeE9kOeBtwWZKftbL/AxwBnJpkP+AG4I1t2PeAXYDFwP3AvgBVdWeSw4AL23iHVtWdU7IGkiRgCCFSVf8OZIzBO44yfgEHjDGv44HjJ652kqSVYbMnkqTeDBFJUm+GiCSpN0NEktSbISJJ6s0QkST1ZohIknozRCRJvRkikqTeDBFJUm+GiCSpN0NEktSbISJJ6s0QkST1ZohIknozRCRJvQ3jPxtKksZwyCGHDLsKK1UHz0QkSb0ZIpKk3gwRSVJvhogkqTdDRJLUmyEiSerNEJEk9WaISJJ6M0QkSb0ZIpKk3gwRSVJvhogkqbdp3wBjkp2BzwKrAf9cVUcMuUqSnoCuOvysYVeBP/nQDsOuwoSb1mciSVYD/h/wGmBL4M1JthxurSRp5pjWIQJsAyyuqmur6nfAKcCuQ66TJM0Yqaph16G3JLsDO1fV/2z9bwNeVFUHLjfe/sD+rfdZwNUTXJWNgV9N8Dwn2nSoI1jPiWY9J9Z0qOdk1PGPq2rWaAOm/T2R8aiqY4FjJ2v+SRZV1bzJmv9EmA51BOs50aznxJoO9ZzqOk73y1k3A5sP9G/WyiRJU2C6h8iFwNwkWyRZA9gTOGPIdZKkGWNaX86qqqVJDgTOpHvE9/iqumIIVZm0S2UTaDrUEaznRLOeE2s61HNK6zitb6xLkoZrul/OkiQNkSEiSerNEJkASeYkeUvPae+bhPr8ZKLnOdHaNrt82PUYTZJ3J7kqyZeHXZfJkuR7SdYfdj2GLcn1STYedj3GkuSQJO9PcmiSV07B8nZb2VY/DJGJMQcYNUSSTPnDC1X1kqle5h+YdwKvqqq39p3BVP/ex7u8dJ5UVbtU1d2TXK1JNbIuw67HVKiqj1TVv03Bonaja0Jq3GbEL2As7dvwVUm+kOSKJD9IslaSZyT5fpKLkvw4ybPb+Ce0t+RHph85izgC+PMkP0vy3iT7JDkjyVnAwiRPTbIwycVJLksyqU2zJLmv/YF9KsnlbZlvasNOSrLbwLhfXpX6JFknyXeT/Lwt601JPpLkwtZ/bJK0cbdu4/0cOGBgHvsk+Ubb5tck+eTAsJ2S/LRtu68leWorPyLJlUkuTfLpVrZHW+bPk/yo5/r8I/DfgH9N8qEkxye5IMklI9up7Tc/bnW6OMlLWvn2rfwM4MoJ3J6PfFtOMi/J2a37kCQnJzkXOLltx9OTnN2248ED9b06yUnA5cDmI/McbXltmq2TnNP+Bs5MsulKrMO32nRXpGstYmSfPLwt57wkm7TyZ7T+y5J8dOBviiR/2/ajS5P8/Vjrsirbtg16V5b9bY78rW/T9rtLkvwkybNa+T5t/Ra0bXhgkve18c5LsuHAej3mGDLOen4oyS+S/DtdCxuPOvaMse+Puh3bPvmdgXkfnWSf0ebT9uO/BD6V7lj2jHFVuKpm7IfuDGIp8PzWfyqwF7AQmNvKXgSc1bpPAHYfmP6+9nN74DsD5fsANwEbtv7VgXVb98bAYpY9GXffJKzXfcAbgAV0jz5vAvwS2BR4OfCtNt56wHXA6quwrDcAXxjoX29kvVv/ycDrWvelwMta96eAywe217Vt2jWBG+gODhsDPwLWaeN9APgIsBFd0zUj23D99vMyYPZgWc91ur4t+2PAXiPzA34BrAOsDazZyucCiwb2g98AW0zw9rwe2Lj1zwPObt2HABcBaw1sx1va9lmL7iA7j24/fxjYdpR1HG15TwZ+AsxqZW+ie3x+vOswst+P1GEjoAb2g08CH27d3wHe3LrfwbK/qZ3oHlUN3Zfd7wAvG21dJmDbvqv1v5OuJXCAdWl/F8Arga8PbOPFwNOAWcA9wDvasCOB97TuUY8h46jj1nT78dqtDouB99OOPYy974+1Hbfn0cemo9s6jDWfExg4xo3nM6PPRJrrqupnrfsiup30JcDXkvwM+Ce6g+/KWlBVd7buAB9Lcinwb8BsugP7ZHop8JWqeqiqbgPOAV5YVefQvaA5C3gz3R/H0lVYzmXAq5J8IsmfV9U9wCuSnJ/kMmAH4Dnprr+vX1UjZwgnLzefhVV1T1U9QPct/o+BbelOrc9tv4v5rfwe4AHguCSvB+5v8zgXOCHJX9GF56raCTioLftsuoD7I7qD7Bfa+n2NR5/+X1BV163CMkfbnityRlX9dqB/QVXd0cq+QbcfANxQVeeNc3nPAp4LLGjr/mG61iDG693pzjbPo/syMBf4Hd2BDpb9nQG8mG4bAvzLwDx2ap9LgIuBZ7f5rGhdHs9Y2/Ybo9RrPbpjwOV04fCcgfn8sKruraoldPvitwfmPyfd2XLfY8ifA9+sqvur6tc89uXpsfb9sbbjWMaaz0qb1i8bTpAHB7ofoju4311Vzx9l3KW0S4DprsWusYL5/mag+61031q2rqrfJ7me7oA0LCfRnXHtCey7KjOqql8k2QrYBfhokoV0l6rmVdWNSQ5hfOu6/O9hdbrwXVBVb15+5CTbADvSfTs7ENihqt6R5EXAXwAXJdm6qu5YhdUL8IaqelSDnW2dbgOeR7c/PDAwePD3vtLG2J6P7Hc8dlsuv7zlX/yqMcZb0fK+CVxRVS9e2fon2Z7um/uLq+r+dJfe1gR+X+2rLst+vyucFfDxqvqn5eY/Z6x1eTxjrCss2/cG63UYXVj8j7bMswdmNbivPjzQ/3Cb/kmMfQxZJdW9YP2YfX8FkwzuO9D2nx7zGZNnIo/1a+C6JHvAIzfvnteGXU93ugndtcMnt+576U5vx7IecHsLkFfQfZuebD8G3pRktXbW8TLggjbsBOA9AFXV69r9iCRPB+6vqi/RXaLaqg36VftGtntbzt3A3UlGvhmP56b1ecB2SZ7ZlrVOkv/e5rteVX0PeC/dwZwkz6iq86vqI8ASVuJ6+RjOpLtePnJP5wWtfD3glqp6GHgbE3PWQ1vGaNvzepbtd294nFm8KsmGSdaiu0l6bo/lXQ3MSvLiNs6TkzxnBbMZtB5wVwuQZ9OdTa7IeSxbpz0Hys8E3p5l98BmJ/kv46zDqFawr45mPZa1w7fPyiynnUGMdQx5PD8Cdkt3b/ZpwOsGB4617zP2drwB2DLJU9rVgB0fZz6Pdyx7DM9ERvdW4JgkH6YLilOAnwNfAE5vp+rfZ9k3okuBh1r5CcBdy83vy8C32+WPRcB/THL9i+7b5ItbvQv4u6q6FaCqbktyFfCtCVjWn9LdiHsY+D3wv+kOXpcDt9K1bzZiX+D4JAX84HFXompJuwn4lSRPacUfptvRT0+yJt031ve1YZ9KMreVLaRb91VxGPAZ4NJ25nkd8Frg88DXk+zNo/eDiTDa9lyL7rLDYTz6G/FoLgC+Tnf56UtVtah9kx738qrqd+lu4h6VZD2648RngPE0KfR94B1t/7qa7uC2Iu8BvpTkQ23aewCq6gdJ/gT4acvw++jOnh8aRx3GMtq2PW2McT8JnNiOAd/tsayxjiErVFUXJ/lqG/d2Hv33A90BfrR9/z2Mvh1vTHIq3d/jdXSXB1c0n1PoLtW+m+7eyH8+Xp1t9uQPTJKNgIurasyznSRr012/3Woc19w1TbTAnVfL/T+dJ7K2L/62qirJnnQ3h/3HcitpmNvRM5E/IO10/Wzg0ysY55XAccCRBoieALYGjm6XDO8G3j7c6kxbQ9uOnolIknrzxrokqTdDRJLUmyEiSerNEJF6SvJQujaGRj4HTcA8H9UidLq2so5a1flKk8Ub61JPSe6rqqdO8Dy3B95fVa+dyPlKk8UzEWmCpWvd9ePt7GRRkq3StYT7n0ne0cZJRmllmce2CP1IK6ztTfRvpWt19bwkf9bKD0nX2vDZSa5tL4pJU8L3RKT+1krXwN6Ij1fVV1v3L6vq+UmOpGvFYDu6dosuB/4ReD3wfLrmJjYGLkzXfP1BDJyJtDOTEX8PXFJVuyXZga4NtOe3Yc8GXkH3JvLVSY6pqt9P5MpKozFEpP5+u4JG9kZaX70MeGpV3Qvcm+TB1obRI60sA7clOQd4IV3bbWN5Ka19pKo6K8lGSdZtw75bVQ8CDya5na4h0ZtWYd2kcfFyljQ5Blt2Xb7V18n48jZaK8jSpDNEpOEYq5XlFbWi+mNa68ftMtevWoux0tD4bUXqb/l7It+vqvE+5jtqK8tJ7uDRLUJfMjDNIXStIF9K90+E5q9a9aVV5yO+kqTevJwlSerNEJEk9WaISJJ6M0QkSb0ZIpKk3gwRSVJvhogkqbf/D+DvDmX+uXoeAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "df=pd.read_csv(\"data/emotion_dataset_2.csv\")\n",
    "sns.countplot(x='Emotion', data=df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Data Cleaning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>Emotion</th>\n",
       "      <th>Text</th>\n",
       "      <th>Clean_Text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>neutral</td>\n",
       "      <td>Why ?</td>\n",
       "      <td>?</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>joy</td>\n",
       "      <td>Sage Act upgrade on my to do list for tommorow.</td>\n",
       "      <td>Sage Act upgrade list tommorow.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>sadness</td>\n",
       "      <td>ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...</td>\n",
       "      <td>WAY HOMEGIRL BABY FUNERAL!!! MAN HATE FUNERALS...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>joy</td>\n",
       "      <td>Such an eye ! The true hazel eye-and so brill...</td>\n",
       "      <td>eye ! true hazel eye-and brilliant ! Regular f...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>joy</td>\n",
       "      <td>@Iluvmiasantos ugh babe.. hugggzzz for u .!  b...</td>\n",
       "      <td>ugh babe.. hugggzzz u .! babe naamazed nga ako...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34787</th>\n",
       "      <td>34787</td>\n",
       "      <td>surprise</td>\n",
       "      <td>@MichelGW have you gift! Hope you like it! It'...</td>\n",
       "      <td>gift! Hope like it! hand wear ! It'll warm! Lol</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34788</th>\n",
       "      <td>34788</td>\n",
       "      <td>joy</td>\n",
       "      <td>The world didnt give it to me..so the world MO...</td>\n",
       "      <td>world didnt me..so world DEFINITELY cnt away!!!</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34789</th>\n",
       "      <td>34789</td>\n",
       "      <td>anger</td>\n",
       "      <td>A man robbed me today .</td>\n",
       "      <td>man robbed today .</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34790</th>\n",
       "      <td>34790</td>\n",
       "      <td>fear</td>\n",
       "      <td>Youu call it JEALOUSY, I call it of #Losing YO...</td>\n",
       "      <td>Youu JEALOUSY, #Losing YOU...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34791</th>\n",
       "      <td>34791</td>\n",
       "      <td>sadness</td>\n",
       "      <td>I think about you baby, and I dream about you ...</td>\n",
       "      <td>think baby, dream time</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>34792 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       Unnamed: 0   Emotion  \\\n",
       "0               0   neutral   \n",
       "1               1       joy   \n",
       "2               2   sadness   \n",
       "3               3       joy   \n",
       "4               4       joy   \n",
       "...           ...       ...   \n",
       "34787       34787  surprise   \n",
       "34788       34788       joy   \n",
       "34789       34789     anger   \n",
       "34790       34790      fear   \n",
       "34791       34791   sadness   \n",
       "\n",
       "                                                    Text  \\\n",
       "0                                                 Why ?    \n",
       "1        Sage Act upgrade on my to do list for tommorow.   \n",
       "2      ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...   \n",
       "3       Such an eye ! The true hazel eye-and so brill...   \n",
       "4      @Iluvmiasantos ugh babe.. hugggzzz for u .!  b...   \n",
       "...                                                  ...   \n",
       "34787  @MichelGW have you gift! Hope you like it! It'...   \n",
       "34788  The world didnt give it to me..so the world MO...   \n",
       "34789                           A man robbed me today .    \n",
       "34790  Youu call it JEALOUSY, I call it of #Losing YO...   \n",
       "34791  I think about you baby, and I dream about you ...   \n",
       "\n",
       "                                              Clean_Text  \n",
       "0                                                      ?  \n",
       "1                        Sage Act upgrade list tommorow.  \n",
       "2      WAY HOMEGIRL BABY FUNERAL!!! MAN HATE FUNERALS...  \n",
       "3      eye ! true hazel eye-and brilliant ! Regular f...  \n",
       "4      ugh babe.. hugggzzz u .! babe naamazed nga ako...  \n",
       "...                                                  ...  \n",
       "34787    gift! Hope like it! hand wear ! It'll warm! Lol  \n",
       "34788    world didnt me..so world DEFINITELY cnt away!!!  \n",
       "34789                                 man robbed today .  \n",
       "34790                      Youu JEALOUSY, #Losing YOU...  \n",
       "34791                             think baby, dream time  \n",
       "\n",
       "[34792 rows x 4 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dir(nfx)\n",
    "# user Handles\n",
    "df['Clean_Text'] = df['Text'].apply(nfx.remove_userhandles)\n",
    "df['Clean_Text'] = df['Clean_Text'].apply(nfx.remove_stopwords)\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Features and Labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "Xfeatures=df['Clean_Text']\n",
    "ylabels=df['Emotion']\n",
    "#train test split\n",
    "x_train,x_test,y_train,y_test=train_test_split(Xfeatures,ylabels,test_size=0.2,random_state=41)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Building Pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.pipeline import Pipeline\n",
    "# a simple Logistic Regression Pipeline\n",
    "pipe_lr=Pipeline(steps=[('cv',CountVectorizer()),('lr',LogisticRegression())])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Train and Fit data in model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "e:\\personal\\NLP project\\emotion recognition\\venv\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
      "\n",
      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
      "Please also refer to the documentation for alternative solver options:\n",
      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
      "  n_iter_i = _check_optimize_result(\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0.641471475786751"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pipe_lr.fit(x_train,y_train)\n",
    "pipe_lr.score(x_test,y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Save the model with pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "import joblib\n",
    "pipeline_file=open('emotion_classifier_pipe_lr_03_jan_2022.pkl',\"wb\")\n",
    "joblib.dump(pipe_lr,pipeline_file)\n",
    "pipeline_file.close()"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "e660acd19f9beec01655d5bd5b63ad72ddc5c8c9f5501ea1952cbf7b1d7f5777"
  },
  "kernelspec": {
   "display_name": "Python 3.10.4 ('venv': venv)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
